/*    Copyright 2011 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.List;

import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.IIDTextModel;
import mosdi.fa.MarkovianTextModel;
import mosdi.paa.DAA;
import mosdi.paa.MinimizedDAA;
import mosdi.paa.PAA;
import mosdi.paa.TextBasedPAA;
import mosdi.paa.apps.DispensationOrderDAA;
import mosdi.util.Alphabet;
import mosdi.util.BitArray;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;
import mosdi.util.iterators.StringIterator;

public class BestDispensationOrderSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <dispensation-order-length> <flow-count>\n" +
		"\n" +
		"Enumerates all dispensation orders of the given length\n" +
		"and outputs one with optimal expected read length.\n" +
		"Orders that do not contain all characters and those\n" +
		"with runs of the same character are skipped.\n" +
		"\n" +
		"Options:\n" +
		"  -M <order>: Order of the Markov model. Default: 0 (i.i.d.)\n" +
		"  -t <fasta-file>: Estimate text model from given sequence(s).\n" +
		"  -q <q-gram-table-file>: Estimate text model from given q-gram table.\n" +
		"                          A q-gram table can be created by using\n" +
		"                          \"mosdi-utils count-qgrams\".\n" +
		"  -m <max-length>: maximal read length up to which the distribution is\n" +
		"                   computed (default: 2*<flow-count>).\n" +
		"  -a: print all evaluated dispensation orders, not only the best one.";
	}

	@Override
	public String description() {
		return "Enumerates all dispensation orders of a given length and outputs the best one (in terms of expected read length).";
	}

	@Override
	public String name() {
		return "best-order";
	}

	private void print(String pattern, double expectation, double[] distribution) {
		StringBuffer sb = new StringBuffer();
		sb.append(String.format(">>pattern>> %s >>expectation>> %e >>distribution>>", pattern, expectation));
		for (int i=1; i<distribution.length; ++i) {
			sb.append(String.format(" %e", distribution[i]));
		}
		Log.println(Log.Level.STANDARD, sb.toString());
	}
	
	@Override
	public int run(String[] args) {
		parseOptions(args, 2, "t:M:q:m:a");

		// Option dependencies
		exclusiveOptions("q", "t");
		exclusiveOptions("q", "M");
		impliedOptions("M", "t");
		
		Alphabet alphabet = Alphabet.getDnaAlphabet();

		// Mandatory arguments
		int dispensationOrderLength = getIntArgument(0);
		int flowCount = getIntArgument(1);

		// Options
		int textModelOrder = getNonNegativeIntOption("M", 0);
		String qGramTableFilename = getStringOption("q", null);
		String fastaFilename = getStringOption("t", null);
		int maxReadLength = getNonNegativeIntOption("m", 2*flowCount);
		boolean printAll = getBooleanOption("a", false);
		
		// Create text model
		FiniteMemoryTextModel textModel = null;
		try {
			if (fastaFilename!=null) {
				List<NamedSequence> sequences = SequenceUtils.readFastaFile(fastaFilename, alphabet, true);
				if (textModelOrder==0) {
					textModel = new IIDTextModel(alphabet.size(), SequenceUtils.sequenceList(sequences));
				} else {
					textModel = new MarkovianTextModel(textModelOrder, alphabet.size(), SequenceUtils.sequenceList(sequences));
				}
			}
			if (qGramTableFilename!=null) {
				textModel = SequenceUtils.buildTextModelFromQGramFile(qGramTableFilename);
			}
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}
		if (textModel==null) {
			textModel = new IIDTextModel(alphabet.size());
		}

		int[] bestOrder = null;
		double bestExpectation = 0.0;
		double[] bestDistribution = null;
		
		StringIterator iterator = new StringIterator(alphabet.size(), dispensationOrderLength);
		while (iterator.hasNext()) {
			int[] order = iterator.next(); 
			// check for runs of the same character
			boolean skip = false;
			BitArray usedChars = new BitArray(alphabet.size());
			usedChars.set(order[0], true);
			for (int i=1; i<order.length; ++i) {
				usedChars.set(order[i], true);
				if (order[i] == order[i-1]) {
					iterator.skip(i);
					skip = true;
					break;
				}
			}
			if (skip) continue;
			usedChars.invert();
			if (!usedChars.allZero()) continue;
			if (order[0]==order[order.length-1]) continue;
			
			DispensationOrderDAA daa = new DispensationOrderDAA(alphabet.size(), order, flowCount+1);
			DAA minimizedDaa = new MinimizedDAA(daa);
			PAA paa = new TextBasedPAA(minimizedDaa, textModel);
			double[] distribution = paa.waitingTimeForValue(maxReadLength+1, flowCount+1);
			double expectation = 0.0;
			for (int i=0; i<=maxReadLength; ++i) {
				expectation += distribution[i+1] * i;
			}
			if (printAll) {
				print(alphabet.buildString(order), expectation, distribution);
			} else {
				if (expectation>bestExpectation) {
					bestOrder = order;
					bestDistribution = distribution;
					bestExpectation = expectation;
				}
			}
		}
		if (!printAll) {
			print(alphabet.buildString(bestOrder), bestExpectation, bestDistribution);
		}
		return 0;
	}
}
